Author: Amanda Everitt
Began: 8/22/2018
Finished:: 8/23/2018
At the conclusion of the last script we ran 5 different DEX comparisons - Neuronal group 2 vs group 1 - Neuronal WT vs Neuronal NULL - Neuronal WT vs Neuronal HET - Tbr1+ Neuronal WT vs Tbr1+ Neuronal NULL - Tbr1+ Neuronal WT vs Tbr1+ Neuronal HET
Lets see how the compare to each other and if there is a Tbr1 dosage effect
full_list <- list()
DE_list <- list()
hcDE_list <- list()
GENEoverlap = data.frame(Gene=character(), File=character())
for (i in list.files(path=paste0(wd,"/outputs/output_04"), pattern="*.csv", full.names = T)){
name = tools::file_path_sans_ext(basename(i))
cat(name,"\n")
full.file = read.csv(i, row.names = 1)
rownames(full.file) <- full.file$gene
full_list[[name]] <- full.file
dex.file = full.file[full.file$fdr < 0.05, ]
cat(nrow(dex.file), "dex genes\n")
DE_list[[name]] <- dex.file
write.csv(dex.file, paste0(out_dir, "/DE_", name, ".csv"))
hc.dex.file = dex.file[abs(dex.file$logfc) > 1 & is.na(dex.file$logfc) == FALSE, ]
cat(nrow(hc.dex.file), "dex genes logfc > or < 1\n")
hcDE_list[[name]] <- hc.dex.file
write.csv(hc.dex.file, paste0(out_dir, "/hcDE", name, ".csv"))
temp <- data.frame(Gene=dex.file$gene,
File=rep(name, length(rownames(dex.file))),
Direction=ifelse(dex.file$logfc >0 , 'UP', 'DOWN')
)
GENEoverlap <- merge(x=GENEoverlap, y=temp, all.x=TRUE, all.y=TRUE)
}
## Group2vsGroup1
## 292 dex genes
## 46 dex genes logfc > or < 1
## Tbr1_WTvsTbr1_HET
## 8 dex genes
## 7 dex genes logfc > or < 1
## Tbr1_WTvsTbr1_NULL
## 8 dex genes
## 8 dex genes logfc > or < 1
## WTvsHET
## 320 dex genes
## 51 dex genes logfc > or < 1
## WTvsNULL
## 470 dex genes
## 44 dex genes logfc > or < 1
GENEoverlap <- ddply(GENEoverlap, .(Gene,Direction), summarize, Count=length(unique(File)), Files=paste(unique(File),collapse=","))
write.csv(GENEoverlap, paste0(out_dir,"/overlapping_gene.csv"),row.names = FALSE)
## NULL
head(GENEoverlap[GENEoverlap$Count == 5, 1:2])
## Gene Direction
## 82 Calm2 UP
## 192 Fau DOWN
## 537 Tpt1 DOWN
## NULL
genes29 <- intersect(intersect(rownames(hcDE_list[["Group2vsGroup1"]]),
rownames(hcDE_list[["WTvsNULL"]])),
rownames(hcDE_list[["WTvsHET"]]))
GENEoverlap[GENEoverlap$Gene %in% genes29, c(1:2)]
## Gene Direction
## 13 Actg1 DOWN
## 47 Atp5g2 DOWN
## 49 Atp5h DOWN
## 82 Calm2 UP
## 121 Cox7b DOWN
## 159 Eef1a1 DOWN
## 163 Eef1g DOWN
## 186 Fabp5 DOWN
## 192 Fau DOWN
## 206 Gapdh DOWN
## 222 Gpx4 DOWN
## 252 Hsp90ab1 DOWN
## 268 Kif1a DOWN
## 289 Malat1 DOWN
## 293 Mapt DOWN
## 295 Marcksl1 UP
## 310 Mllt11 UP
## 325 Naca DOWN
## 343 Ndufb9 DOWN
## 352 Nnat DOWN
## 370 Pcp4 DOWN
## 373 Pebp1 DOWN
## 375 Pfdn5 DOWN
## 411 Ptma DOWN
## 485 Snrpn UP
## 506 Stmn3 DOWN
## 514 Tbca DOWN
## 537 Tpt1 DOWN
## 556 Ubc UP
## 568 Uqcrh DOWN
## 576 Vamp2 UP
bulk_L6 <- read.table("../raw_data/L6_Null_vs_L6_WT-_List_of_significant_genes.txt", header = T)
vennPlot1 = venn.diagram(list(single_cell_L5_NULL = rownames(DE_list[["WTvsNULL"]]),
bulk_seq_L6_NULL = bulk_L6$GeneID),
fill = c("blue", "red"), main="Overlapping DEX with L6 bulk paper",
alpha = c(0.5, 0.5), cex = 2, filename = NULL)
grid.arrange(grobTree(vennPlot1), ncol=1)
tmp <- bulk_L6[bulk_L6$GeneID %in% intersect(rownames(DE_list[["WTvsNULL"]]), bulk_L6$GeneID), ]
a <- DE_list[["WTvsNULL"]]
tmp2 <- a[a$gene %in% intersect(rownames(DE_list[["WTvsNULL"]]), bulk_L6$GeneID), ]
tmp3 <- merge(tmp[,c("GeneID","FDR","logFC")], tmp2[,c("gene","fdr","logfc")], by.x= "GeneID", by.y="gene", all=T)
colnames(tmp3) <- c("gene","bulk_FDR","bulk_logFC","sc_FDR","sc_logFC")
tmp3
## gene bulk_FDR bulk_logFC sc_FDR sc_logFC
## 1 Crym 6.799982e-03 0.4852461 1.815403e-03 -0.24866363
## 2 Dok5 4.619205e-02 -0.4475063 8.966553e-03 0.24263099
## 3 Eif1b 7.052720e-04 -0.6263932 3.430163e-02 -0.39010912
## 4 Igfbpl1 1.876577e-02 -0.3932694 1.212472e-03 -0.15968406
## 5 Mef2c 8.141727e-04 -0.4510263 7.464419e-05 0.64670311
## 6 Mn1 7.052720e-04 -0.4756535 2.956498e-03 0.21722292
## 7 Nrgn 3.005407e-05 1.4249697 1.130145e-109 2.23785540
## 8 Ptprz1 4.847611e-02 0.6078919 2.030031e-02 -0.30305609
## 9 Rorb 1.028230e-03 -0.8845394 3.822102e-04 0.26131865
## 10 Slc1a3 1.461498e-03 0.4096388 1.841557e-02 -0.31237367
## 11 Slc6a15 8.560334e-03 0.3977337 1.454119e-03 0.20207802
## 12 Tiam2 1.182788e-02 -0.4019967 4.513499e-02 -0.07731485
## 13 Tmsb4x 3.266334e-02 0.1551467 3.640713e-70 -0.49757955
ASD_99 <- read.csv("../raw_data/ASD_100_short.csv", header = F)
NDD_99 <- read.csv("../raw_data/NDD_Genes.csv")
vennPlot1 = venn.diagram(list(DE_single_cell = na.omit(toupper(GENEoverlap$Gene)),
NDD = NDD_99$Gene),
fill = c("blue", "red"), main="Overlapping with NDD list",
alpha = c(0.5, 0.5), cex = 2, filename = NULL)
vennPlot2 = venn.diagram(list(DE_single_cell = na.omit(toupper(GENEoverlap$Gene)),
ASD = ASD_99$V1),
fill = c("blue", "yellow"), main="Overlapping with ASD list",
alpha = c(0.5, 0.5), cex = 2, filename = NULL)
grid.arrange(grobTree(vennPlot1),grobTree(vennPlot2), ncol=1)
GENEoverlap[toupper(GENEoverlap$Gene) %in% intersect(na.omit(toupper(GENEoverlap$Gene)), ASD_99$V1), ]
## Gene Direction Count Files
## 25 Ank2 DOWN 1 WTvsHET
## 30 Ap2s1 DOWN 1 WTvsNULL
## 134 Ctnnb1 DOWN 2 WTvsHET,WTvsNULL
## 151 Dpysl2 UP 1 WTvsNULL
## 290 Map1a DOWN 1 WTvsNULL
## 438 Rorb UP 2 Group2vsGroup1,WTvsNULL
## 471 Smarcc2 DOWN 1 WTvsNULL
GENEoverlap[toupper(GENEoverlap$Gene) %in% intersect(na.omit(toupper(GENEoverlap$Gene)), NDD_99$Gene), ]
## Gene Direction Count Files
## 58 Atxn1 UP 1 WTvsNULL
## 59 Atxn1 <NA> 2 Group2vsGroup1,WTvsHET
## 131 Cst3 DOWN 1 WTvsNULL
## 132 Cst3 UP 2 Group2vsGroup1,WTvsHET
## 261 Ift74 DOWN 1 WTvsNULL
## 293 Mapt DOWN 3 Group2vsGroup1,WTvsHET,WTvsNULL
## 345 Nedd8 DOWN 3 Group2vsGroup1,WTvsHET,WTvsNULL
## 376 Pfn1 UP 3 Group2vsGroup1,Tbr1_WTvsTbr1_NULL,WTvsNULL
## 476 Snca UP 3 Group2vsGroup1,WTvsHET,WTvsNULL
## 477 Sncb UP 3 Group2vsGroup1,WTvsHET,WTvsNULL
## 563 Uchl1 DOWN 3 Group2vsGroup1,WTvsHET,WTvsNULL
uniquetoNULL <- setdiff(rownames(DE_list[["WTvsNULL"]]), rownames(DE_list[["WTvsHET"]]))
uniquetoHET <- setdiff(rownames(DE_list[["WTvsHET"]]), rownames(DE_list[["WTvsNULL"]]))
het.df <- DE_list[["WTvsHET"]]
b<- het.df[uniquetoHET, ]
head(b[order(b$fdr, decreasing = F),], n=20)
## gene Pr..Chisq. fdr logfc
## Tuba1a Tuba1a 1.911346e-20 3.264760e-18 0.1673770
## Tceb2 Tceb2 6.120574e-13 5.629359e-11 -0.7121796
## Marcks Marcks 1.295289e-12 1.133220e-10 -0.8390952
## Ndn Ndn 4.561298e-11 3.676713e-09 -0.3908530
## Tmem256 Tmem256 5.204519e-11 4.148580e-09 -0.7801156
## Sec61g Sec61g 8.864474e-10 6.174149e-08 -0.9917001
## Hint1 Hint1 2.699119e-09 1.792915e-07 -0.8767860
## Minos1 Minos1 3.347817e-09 2.203417e-07 -0.8708139
## Dbi Dbi 4.172841e-09 2.672854e-07 -0.1646555
## Hist1h2al Hist1h2al 1.034930e-08 6.292023e-07 NA
## Qk Qk 2.057214e-08 1.219707e-06 -0.2202969
## Nucks1 Nucks1 3.372360e-08 1.983058e-06 -0.3878726
## Mt1 Mt1 6.952680e-08 3.866552e-06 -0.7155882
## Fdps Fdps 3.360401e-07 1.721966e-05 0.4362562
## Ncald Ncald 3.661267e-07 1.849713e-05 0.2516487
## 2700094K13Rik 2700094K13Rik 5.415229e-07 2.660880e-05 -0.4244777
## Timm8b Timm8b 7.966073e-07 3.835477e-05 -0.7121666
## Glul Glul 1.158288e-06 5.395818e-05 0.6013919
## Nap1l1 Nap1l1 1.241027e-06 5.743955e-05 -0.2586481
## Trappc2l Trappc2l 2.617065e-06 1.173426e-04 -0.2842538
null.df <- DE_list[["WTvsNULL"]]
a<- null.df[uniquetoNULL, ]
head(a[order(a$fdr, decreasing = F),], n=20)
## gene Pr..Chisq. fdr logfc
## Uba52 Uba52 5.207473e-67 5.336916e-64 -2.5052484
## Pfn1 Pfn1 6.618202e-21 1.217410e-18 1.4872062
## Agl Agl 3.205446e-20 5.608748e-18 0.6186134
## AY036118 AY036118 1.109349e-13 1.243511e-11 -0.2515298
## Ybx1 Ybx1 1.737712e-13 1.917899e-11 -0.3990117
## Igfbp7 Igfbp7 2.499205e-12 2.561328e-10 -0.2888790
## Sparc Sparc 9.226961e-12 8.945164e-10 -0.7863769
## R3hdm4 R3hdm4 2.075054e-11 1.958741e-09 -0.8919598
## Dynll1 Dynll1 9.878987e-11 8.337865e-09 0.5010743
## Cbx3 Cbx3 1.737398e-10 1.432655e-08 0.7007783
## Btf3 Btf3 2.676348e-10 2.133346e-08 -0.9204052
## Olig1 Olig1 3.127263e-10 2.465383e-08 -0.7150770
## Usmg5 Usmg5 5.009233e-10 3.823004e-08 0.2355280
## Vps8 Vps8 1.629184e-09 1.157204e-07 -0.5006708
## Pou3f1 Pou3f1 4.001282e-09 2.682729e-07 0.4624765
## Atp5k Atp5k 4.061838e-09 2.698113e-07 0.3756602
## Polr2l Polr2l 4.898630e-09 3.224108e-07 0.6796923
## Anp32b Anp32b 1.259390e-08 7.925322e-07 -0.4574676
## Polr2k Polr2k 4.598316e-08 2.703961e-06 0.5534761
## Zwint Zwint 7.695634e-08 4.416678e-06 0.6169545
possible_dosage <- intersect(rownames(DE_list[["WTvsNULL"]]), rownames(DE_list[["WTvsHET"]]))
a <- merge(null.df[possible_dosage, c("gene","fdr","logfc")], het.df[possible_dosage, c("gene","fdr","logfc")], by = "gene")
colnames(a) <- c("gene","fdr_null","logFC_null","fdr_het","logfc_het")
a$diff <- a$logFC_null - a$logfc_het
head(a[order(abs(a$diff), decreasing = T), ], n=20)
## gene fdr_null logFC_null fdr_het logfc_het diff
## 134 Nrgn 1.130145e-109 2.2378554 1.597285e-08 0.6945417 1.5433137
## 21 Basp1 9.515245e-14 -0.9106342 2.372135e-63 -2.1064983 1.1958641
## 114 Mllt11 8.207806e-20 1.2951943 1.635413e-54 2.3852800 -1.0900857
## 213 Uqcrq 7.534780e-17 0.6426170 2.257054e-06 -0.3696124 1.0122294
## 150 Ptgds 5.222333e-58 -1.3680205 2.476207e-35 -0.5572251 -0.8107953
## 194 Tma7 1.130763e-10 -0.4560004 1.211730e-20 -1.2526005 0.7966001
## 72 Fabp5 6.288732e-41 -1.3970972 5.758390e-62 -2.1288752 0.7317779
## 62 Eef1b2 6.293825e-27 -1.6585304 2.170457e-14 -0.9405753 -0.7179552
## 211 Uqcrb 1.868756e-02 0.2812153 1.004351e-10 0.9841242 -0.7029089
## 206 Ubb 5.457484e-37 -0.5727987 7.773452e-03 0.1139253 -0.6867240
## 53 Cst3 1.745072e-02 -0.2956878 2.390381e-26 0.3896798 -0.6853676
## 94 Hbb-bs 1.724861e-34 -0.9362930 2.488139e-20 -0.2729604 -0.6633326
## 142 Pfn2 5.074791e-03 0.2267480 7.250598e-11 0.8840518 -0.6573038
## 137 Pcp4 5.229309e-35 -2.0342251 5.180682e-17 -1.4143072 -0.6199178
## 74 Fau 4.282121e-101 -1.5124906 4.070989e-151 -2.1048027 0.5923121
## 51 Cpe 9.719924e-03 0.5255107 1.319008e-08 1.0234515 -0.4979409
## 110 Marcksl1 7.081025e-18 1.1158453 1.477159e-32 1.6049053 -0.4890600
## 24 Bola2 4.217817e-02 -0.2971507 2.444771e-07 -0.7841082 0.4869575
## 5 Anapc13 4.554266e-02 -0.1114037 2.426204e-07 -0.5979225 0.4865189
## 91 H3f3a 5.071443e-19 -1.2427567 2.215070e-08 -0.7723629 -0.4703938
## 6704 470
## 6854 320
## 6951 223
## Using Term as id variables
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin17.7.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
##
## Matrix products: default
## BLAS/LAPACK: /usr/local/Cellar/openblas/0.3.6_1/lib/libopenblasp-r0.3.6.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 parallel grid stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] org.Mm.eg.db_3.8.2
## [2] Seurat_2.3.4
## [3] Matrix_1.2-17
## [4] cowplot_1.0.0
## [5] GO.db_3.8.2
## [6] ggplot2_3.2.1
## [7] goseq_1.36.0
## [8] geneLenDataBase_1.20.0
## [9] BiasedUrn_1.07
## [10] TxDb.Mmusculus.UCSC.mm10.knownGene_3.4.7
## [11] TxDb.Mmusculus.UCSC.mm10.ensGene_3.4.0
## [12] GenomicFeatures_1.36.4
## [13] AnnotationDbi_1.46.1
## [14] Biobase_2.44.0
## [15] GenomicRanges_1.36.1
## [16] GenomeInfoDb_1.20.0
## [17] IRanges_2.18.3
## [18] S4Vectors_0.22.1
## [19] BiocGenerics_0.30.0
## [20] reshape2_1.4.3
## [21] plyr_1.8.4
## [22] gridExtra_2.3
## [23] VennDiagram_1.6.20
## [24] futile.logger_1.4.3
## [25] knitr_1.25
##
## loaded via a namespace (and not attached):
## [1] snow_0.4-3 backports_1.1.5
## [3] Hmisc_4.2-0 igraph_1.2.4.1
## [5] lazyeval_0.2.2 splines_3.6.0
## [7] BiocParallel_1.18.1 digest_0.6.22
## [9] foreach_1.4.7 htmltools_0.4.0
## [11] lars_1.2 gdata_2.18.0
## [13] magrittr_1.5 checkmate_1.9.4
## [15] memoise_1.1.0 cluster_2.1.0
## [17] mixtools_1.1.0 ROCR_1.0-7
## [19] Biostrings_2.52.0 matrixStats_0.55.0
## [21] R.utils_2.9.0 prettyunits_1.0.2
## [23] colorspace_1.4-1 blob_1.2.0
## [25] xfun_0.10 dplyr_0.8.3
## [27] jsonlite_1.6 crayon_1.3.4
## [29] RCurl_1.95-4.12 zeallot_0.1.0
## [31] zoo_1.8-6 survival_2.44-1.1
## [33] iterators_1.0.12 ape_5.3
## [35] glue_1.3.1 gtable_0.3.0
## [37] zlibbioc_1.30.0 XVector_0.24.0
## [39] DelayedArray_0.10.0 kernlab_0.9-27
## [41] prabclus_2.3-1 DEoptimR_1.0-8
## [43] scales_1.0.0 futile.options_1.0.1
## [45] DBI_1.0.0 bibtex_0.4.2
## [47] Rcpp_1.0.2 metap_1.1
## [49] dtw_1.21-3 progress_1.2.2
## [51] htmlTable_1.13.2 reticulate_1.13
## [53] proxy_0.4-23 foreign_0.8-72
## [55] bit_1.1-14 mclust_5.4.5
## [57] SDMTools_1.1-221.1 Formula_1.2-3
## [59] tsne_0.1-3 htmlwidgets_1.5.1
## [61] httr_1.4.1 gplots_3.0.1.1
## [63] RColorBrewer_1.1-2 fpc_2.2-3
## [65] acepack_1.4.1 modeltools_0.2-22
## [67] ica_1.0-2 pkgconfig_2.0.3
## [69] XML_3.98-1.20 R.methodsS3_1.7.1
## [71] flexmix_2.3-15 nnet_7.3-12
## [73] tidyselect_0.2.5 labeling_0.3
## [75] rlang_0.4.1 munsell_0.5.0
## [77] tools_3.6.0 RSQLite_2.1.2
## [79] ggridges_0.5.1 evaluate_0.14
## [81] stringr_1.4.0 yaml_2.2.0
## [83] npsurv_0.4-0 bit64_0.9-7
## [85] fitdistrplus_1.0-14 robustbase_0.93-5
## [87] caTools_1.17.1.2 purrr_0.3.3
## [89] RANN_2.6.1 pbapply_1.4-2
## [91] nlme_3.1-141 formatR_1.7
## [93] R.oo_1.23.0 biomaRt_2.40.5
## [95] hdf5r_1.3.0 compiler_3.6.0
## [97] rstudioapi_0.10 png_0.1-7
## [99] lsei_1.2-0 tibble_2.1.3
## [101] stringi_1.4.3 lattice_0.20-38
## [103] vctrs_0.2.0 lifecycle_0.1.0
## [105] pillar_1.4.2 lmtest_0.9-37
## [107] Rdpack_0.11-0 irlba_2.3.3
## [109] data.table_1.12.6 bitops_1.0-6
## [111] gbRd_0.4-11 rtracklayer_1.44.4
## [113] R6_2.4.0 latticeExtra_0.6-28
## [115] KernSmooth_2.23-16 codetools_0.2-16
## [117] lambda.r_1.2.4 MASS_7.3-51.4
## [119] gtools_3.8.1 assertthat_0.2.1
## [121] SummarizedExperiment_1.14.1 withr_2.1.2
## [123] GenomicAlignments_1.20.1 Rsamtools_2.0.3
## [125] GenomeInfoDbData_1.2.1 diptest_0.75-7
## [127] mgcv_1.8-30 doSNOW_1.0.18
## [129] hms_0.5.2 rpart_4.1-15
## [131] tidyr_1.0.0 class_7.3-15
## [133] rmarkdown_1.16 segmented_1.0-0
## [135] Rtsne_0.15 base64enc_0.1-3